package com.carl.main;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

public class Day06_DoubanCrawler {

    // 豆瓣 : http://movie.douban.com
    // https://movie.douban.com/j/search_subjects?type=movie&tag=热门&page_limit=50&page_start=0
    /**  Exception :
            Unhandled content type ==> userAgent , header [模拟请求]
                                   ==> ignoreContentType(true)
     */
    public static void main(String[] args)throws Exception{
        String[] types = {"movie", "tv"};
//        String[] keys = {"热门"};
        String[] keys = {"热门", "国产剧", "综艺", "美剧", "日剧", "韩剧", "日本动画", "纪录片"};

        for (String type : types) {
            for (String key : keys) {
                String jsonUrl = "https://movie.douban.com/j/search_subjects?type=" + type
                                        +"&tag=" + key + "&page_limit=50&page_start=0";
                Document jsonDocument = Jsoup.connect(jsonUrl)
                        .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36")
                        .ignoreContentType(true)
                        .get();
                String jsonData = jsonDocument.text();
                JSONArray dataArrays = JSON.parseObject(jsonData).getJSONArray("subjects");
                if (dataArrays != null) {
                    System.out.println("key:" + key + " ;size : " + dataArrays.size());
                    for (Object object : dataArrays) {
                        JSONObject dataJson = JSON.parseObject(object.toString());
                        System.out.println("\t 标题 : " + dataJson.getString("title") +
                                " , \t 评分 : " + dataJson.getString("rate"));
                    }
                }
            }
            System.out.println("\n $$$$$$$$$$$$$$$$ \n");
        }

    }

}
